Copernicus for Sanctuaries

Code
librarian::shelf(
  dplyr, DT, ggplot2, glue, here, jsonlite, leaflet, mapview, ncdf4, readr, reticulate, 
  sf, stringr, terra, tidyr,
  quiet = T)
options(readr.show_col_types = F)

sanctuaries_geo <- here("data/sanctuaries.geojson")
# copernicusmarine username and password
user        <- "bbest1"
pass_txt    <- "~/My Drive/private/data.marine.copernicus.eu_bbest1-password.txt"
dir_cm      <- here("data/copernicus")
results_csv <- here("data/copernicus.csv")

Sanctuaries

We are extracting Copernicus Marine data from across National Marine Sanctuaries, borrowing polygons from the noaa-onms/climate-dashboard (see Github code).

Code
if (!file.exists(sanctuaries_geo)){
  # source: https://github.com/noaa-onms/climate-dashboard/blob/main/data/sanctuaries.rds
  sanctuaries_rds <- here("../climate-dashboard/data/sanctuaries.rds")
  readRDS(sanctuaries_rds) |> 
    filter(nms != "TBNMS") |> # exclude Great Lakes
    write_sf(sanctuaries_geo, delete_dsn = T)
}

sanctuaries <- read_sf(sanctuaries_geo)
sanctuaries |> 
  st_drop_geometry() |> 
  datatable()
Code
mapView(sanctuaries)

Setup for copernicusmarine in R

Code
# do once: create virtual enviroment and install copernicusmarine Python module
# virtualenv_create(envname = "CopernicusMarine")
# virtualenv_install(envname = "CopernicusMarine", packages = c("copernicusmarine"))

# TODO: check for CopernicusMarine env with copernicusmarine Python module

# use virtualenv and reticulate::import copernicusmarine Python module
use_virtualenv(virtualenv = "CopernicusMarine", required = TRUE)
cmt <- import("copernicusmarine")

# login
pass <- readLines(pass_txt)
cmt$login(user, pass, skip_if_user_logged_in = T) # py_help(cmt$login)
[1] TRUE
Code
# writes to /Users/bbest/.copernicusmarine/.copernicusmarine-credentials

Extract Global Ocean Physics Reanalysis

Product > Dataset > Variable

Variables, keep:

  • thetao: Sea water potential temperature [°C]
  • bottomT: Sea water potential temperature at sea floor [°C]
  • so: Sea water salinity [/ 103]
  • mlotst: Ocean mixed layer thickness defined by sigma theta [m]

Variables, skip:

  • siconc: Sea ice area fraction
  • sithick: Sea ice thickness [m]
  • usi: Eastward sea ice velocity [m/s]
  • vsi: Northward sea ice velocity [m/s]
  • uo: Eastward sea water velocity [m/s]
  • vo: Northward sea water velocity [m/s]
  • zos: Sea surface height above geoid [m]

Setup dataset parameters

Code
# py_help(cmt$describe)
# cat_anfc = cmt$describe(
#   contains             = list("GLOBAL_ANALYSISFORECAST_PHY_001_024"), 
#   include_datasets     = T,
#   disable_progress_bar = T)
# cat_rean = cmt$describe(
#   contains             = list("GLOBAL_MULTIYEAR_PHY_001_030", "bottomT"), 
#   include_datasets     = T,
#   disable_progress_bar = T)
# View(cat_rean)

datasets <- list(
  "cmems_mod_glo_phy_my_0.083deg_P1M-m" = list(    # reanalysis monthly
    vars      = list("thetao", "bottomT", "so", "mlotst"),
    date_rng  = c("1993-01-01T00:00:00","2021-06-01T00:00:00"),
    depth_rng =  c(0, 0.5)), # get surface layer only
  "cmems_mod_glo_phy_myint_0.083deg_P1M-m" = list(  # reanalysis monthly, interim
    vars      = list("thetao", "bottomT", "so", "mlotst"),
    date_rng  = c("2021-07-01T00:00:00","2024-12-01T00:00:00"),
    depth_rng =  c(0, 0.5))) # get surface layer only
datasets |> 
  toJSON(auto_unbox = T, pretty=T)
{
  "cmems_mod_glo_phy_my_0.083deg_P1M-m": {
    "vars": [
      "thetao",
      "bottomT",
      "so",
      "mlotst"
    ],
    "date_rng": ["1993-01-01T00:00:00", "2021-06-01T00:00:00"],
    "depth_rng": [0, 0.5]
  },
  "cmems_mod_glo_phy_myint_0.083deg_P1M-m": {
    "vars": [
      "thetao",
      "bottomT",
      "so",
      "mlotst"
    ],
    "date_rng": ["2021-07-01T00:00:00", "2024-12-01T00:00:00"],
    "depth_rng": [0, 0.5]
  }
} 

Iterate over sanctuaries & datasets to subset

Code
for (i in 1:nrow(sanctuaries)){ # i = 1

  d_s <- slice(sanctuaries, i)
  bb <- d_s |> 
    st_buffer(9.25*1000) |> # buffer by a pixel width 1/12° ~ 9.25 km at equator
    st_bbox()

  dir_out <- here(glue("{dir_cm}/{d_s$nms}"))
  dir.create(dir_out, showWarnings=F, recursive=T)
  message(glue("{i}/{nrow(sanctuaries)}: {d_s$sanctuary} ({d_s$nms})"))
  
  for (j in 1:length(datasets)){ # j=1
    
    ds_id <- names(datasets)[j]
    ds    <- datasets[[j]]
    message(glue("{j}/{length(datasets)}: {ds_id}"))
    
    # py_help( cmt$subset)
    nc <- cmt$subset(
      dataset_id            = ds_id,
      # dataset_version     = "202309",
      variables             = ds$vars,
      minimum_longitude     = bb$xmin,
      maximum_longitude     = bb$xmax,
      minimum_latitude      = bb$ymin,
      maximum_latitude      = bb$ymax,
      start_datetime        = ds$date_rng[1],
      end_datetime          = ds$date_rng[2],
      minimum_depth         = ds$depth_rng[1],
      maximum_depth         = ds$depth_rng[2],
      output_directory      = dir_out,
      force_download        = T,
      overwrite_output_data = T)
    nc <- as.character(nc)
    
    # show output_filename with parameters embedded
    # cat(glue("output_filename (basename): {basename(nc)}"))
  }
}

Show raster

Grabbing first netcdf file, show full metadata on variables and attributes:

Code
nms = "FKNMS"
ply <- sanctuaries |> 
  filter(nms == !!nms)
nc <- list.files(glue("data/copernicus/{nms}"), full.names = T)[1]

o <- nc_open(nc)
o
File data/copernicus/FKNMS/cmems_mod_glo_phy_my_0.083deg_P1M-m_multi-vars_83.25W-80.00W_24.25N-25.75N_0.49m_1993-01-01-2021-06-01.nc (NC_FORMAT_NETCDF4):

     4 variables (excluding dimension variables):
        short thetao[longitude,latitude,depth,time]   (Contiguous storage)  
[1] ">>>> WARNING <<<  attribute valid_max is an 8-byte value, but R"
[1] "does not support this data type. I am returning a double precision"
[1] "floating point, but you must be aware that this could lose precision!"
[1] ">>>> WARNING <<<  attribute valid_min is an 8-byte value, but R"
[1] "does not support this data type. I am returning a double precision"
[1] "floating point, but you must be aware that this could lose precision!"
            _FillValue: -32767
            cell_methods: area: mean
            long_name: Temperature
            standard_name: sea_water_potential_temperature
            unit_long: Degrees Celsius
            units: degrees_C
            valid_max: 22600
            valid_min: -32767
            add_offset: 21
            scale_factor: 0.000732444226741791
        short bottomT[longitude,latitude,time]   (Contiguous storage)  
[1] ">>>> WARNING <<<  attribute valid_max is an 8-byte value, but R"
[1] "does not support this data type. I am returning a double precision"
[1] "floating point, but you must be aware that this could lose precision!"
[1] ">>>> WARNING <<<  attribute valid_min is an 8-byte value, but R"
[1] "does not support this data type. I am returning a double precision"
[1] "floating point, but you must be aware that this could lose precision!"
            _FillValue: -32767
            cell_methods: area: mean
            long_name: Sea floor potential temperature
            standard_name: sea_water_potential_temperature_at_sea_floor
            unit_long: Degrees Celsius
            units: degrees_C
            valid_max: 22600
            valid_min: -32767
            add_offset: 21
            scale_factor: 0.000732444226741791
        short so[longitude,latitude,depth,time]   (Contiguous storage)  
[1] ">>>> WARNING <<<  attribute valid_max is an 8-byte value, but R"
[1] "does not support this data type. I am returning a double precision"
[1] "floating point, but you must be aware that this could lose precision!"
[1] ">>>> WARNING <<<  attribute valid_min is an 8-byte value, but R"
[1] "does not support this data type. I am returning a double precision"
[1] "floating point, but you must be aware that this could lose precision!"
            _FillValue: -32767
            cell_methods: area: mean
            long_name: Salinity
            standard_name: sea_water_salinity
            unit_long: Practical Salinity Unit
            units: 1e-3
            valid_max: 31700
            valid_min: 1
            add_offset: -0.00152592547237873
            scale_factor: 0.00152592547237873
        short mlotst[longitude,latitude,time]   (Contiguous storage)  
[1] ">>>> WARNING <<<  attribute valid_max is an 8-byte value, but R"
[1] "does not support this data type. I am returning a double precision"
[1] "floating point, but you must be aware that this could lose precision!"
[1] ">>>> WARNING <<<  attribute valid_min is an 8-byte value, but R"
[1] "does not support this data type. I am returning a double precision"
[1] "floating point, but you must be aware that this could lose precision!"
            _FillValue: -32767
            cell_methods: area: mean
            long_name: Density ocean mixed layer thickness
            standard_name: ocean_mixed_layer_thickness_defined_by_sigma_theta
            unit_long: Meters
            units: m
            valid_max: 23400
            valid_min: 1
            add_offset: -0.152592554688454
            scale_factor: 0.152592554688454

     4 dimensions:
        depth  Size:1 
            axis: Z
            long_name: Depth
            positive: down
            standard_name: depth
            unit_long: Meters
            units: m
            valid_max: 0.494024991989136
            valid_min: 0.494024991989136
        latitude  Size:19 
            axis: Y
            long_name: Latitude
            standard_name: latitude
            step: 0.0833358764648438
            unit_long: Degrees North
            units: degrees_north
            valid_max: 25.75
            valid_min: 24.25
        longitude  Size:40 
            axis: X
            long_name: Longitude
            standard_name: longitude
            step: 0.0833282470703125
            unit_long: Degrees East
            units: degrees_east
            valid_max: -80
            valid_min: -83.25
        time  Size:342 
[1] ">>>> WARNING <<<  attribute valid_min is an 8-byte value, but R"
[1] "does not support this data type. I am returning a double precision"
[1] "floating point, but you must be aware that this could lose precision!"
[1] ">>>> WARNING <<<  attribute valid_max is an 8-byte value, but R"
[1] "does not support this data type. I am returning a double precision"
[1] "floating point, but you must be aware that this could lose precision!"
            valid_min: 376944
            valid_max: 626016
            units: hours since 1950-01-01
            calendar: gregorian

    15 global attributes:
        Conventions: CF-1.6
        area: GLOBAL
        contact: servicedesk.cmems@mercator-ocean.eu
        credit: E.U. Copernicus Marine Service Information (CMEMS)
        dataset: global-reanalysis-001-030-monthly
        institution: Mercator Ocean
        licence: http://marine.copernicus.eu/services-portfolio/service-commitments-and-licence/
        producer: CMEMS - Global Monitoring and Forecasting Centre
        product: GLOBAL_REANALYSIS_001_030
        product_user_manual: http://marine.copernicus.eu/documents/PUM/CMEMS-GLO-PUM-001-030.pdf
        quality_information_document: http://marine.copernicus.eu/documents/QUID/CMEMS-GLO-QUID-001-030.pdf
        references: http://marine.copernicus.eu
        source: MERCATOR GLORYS12V1
        title: Monthly mean fields for product GLOBAL_REANALYSIS_PHY_001_030
        copernicusmarine_version: 1.3.2

Using terra::rast() to read the netcdf file, show first layer:

Code
r <- rast(nc)
r
class       : SpatRaster 
dimensions  : 19, 40, 1368  (nrow, ncol, nlyr)
resolution  : 0.08333333, 0.08333333  (x, y)
extent      : -83.29167, -79.95833, 24.20833, 25.79167  (xmin, xmax, ymin, ymax)
coord. ref. : lon/lat WGS 84 
sources     : cmems_mod_glo_phy_my_0.083deg_P1M-m_multi-vars_83.25W-80.00W_24.25N-25.75N_0.49m_1993-01-01-2021-06-01.nc:thetao  (342 layers) 
              cmems_mod_glo_phy_my_0.083deg_P1M-m_multi-vars_83.25W-80.00W_24.25N-25.75N_0.49m_1993-01-01-2021-06-01.nc:bottomT  (342 layers) 
              cmems_mod_glo_phy_my_0.083deg_P1M-m_multi-vars_83.25W-80.00W_24.25N-25.75N_0.49m_1993-01-01-2021-06-01.nc:so  (342 layers) 
              cmems_mod_glo_phy_my_0.083deg_P1M-m_multi-vars_83.25W-80.00W_24.25N-25.75N_0.49m_1993-01-01-2021-06-01.nc:mlotst  (342 layers) 
varnames    : thetao (Temperature) 
              bottomT (Sea floor potential temperature) 
              so (Salinity) 
              ...
names       : theta~499_1, theta~499_2, theta~499_3, theta~499_4, theta~499_5, theta~499_6, ... 
unit        :   degrees_C,   degrees_C,   degrees_C,   degrees_C,   degrees_C,   degrees_C, ... 
time        : 1993-01-01 to 2021-06-01 UTC 
Code
i <- 1
cat(glue("
  index: {i}
  name: {names(r)[i]}
  time: {time(r[[i]])}
  varname: {varnames(r[[i]])}
  longname: {longnames(r[[i]])}"))
index: 1
name: thetao_depth=0.49402499_1
time: 1993-01-01
varname: thetao
longname: Temperature

Plot the raster with the sanctuary:

Code
plet(
  r[[i]], 
  # main  = glue("{longnames(r[[i]])}\n{names(r)[i]}\n{time(r[[i]])}"), 
  main  = glue("{longnames(r[[i]])}\nthetao_depth=0.5\n{time(r[[i]])}"), 
  tiles = "Esri.OceanBasemap") |> 
  addPolygons(
    data        = ply,
    fillOpacity = 0.2)

Summarize raster to sanctuary

Code
extract_tbl <- function(r, p, stat = "mean"){
  terra::extract(r, p, fun = stat, na.rm=T) |>
    pivot_longer(!ID, names_to = "var_it") |> 
    mutate(
      var  = str_replace(var_it, "_[0-9]+$", ""),
      stat = !!stat,
      time = time(r)) |> 
    select(var, time, stat, value)
}

ncs <- list.files("data/copernicus", "\\.nc$", recursive = T, full.names = T)

for (i in 1:length(ncs)){ # i=9
  nc    <- ncs[i]
  nms   <- str_replace(nc, ".*/copernicus/([A-z-]+)/(.+)_multi-vars.*", "\\1")
  ds_id <- str_replace(nc, ".*/copernicus/([A-z-]+)/(.+)_multi-vars.*", "\\2")
  message(glue("{i}/{length(ncs)}: nms:{nms}, ds:{ds_id}"))
  
  r <- rast(nc)
  # plet(r[[1]])
  p <- sanctuaries |> 
    filter(nms == !!nms)
  
  d <- bind_rows(
    extract_tbl(r, p, "mean"),
    extract_tbl(r, p, "sd")) |> 
    mutate(
      nms     = !!nms,
      dataset = !!ds_id) |> 
    relocate(nms, dataset)
  
  if (file.exists(results_csv)){
    read_csv(results_csv) |> 
      bind_rows(d) |>
      group_by(nms, dataset, var, time, stat) |>
      summarize(
        value   = last(value),
        .groups = "drop") |>
      arrange(nms, dataset, var, time, stat) |>
      write_csv(results_csv)
  } else {
    write_csv(d, results_csv)
  }
}
Code
results_url <- results_csv |> 
  str_replace(
    here(), "https://github.com/noaa-onms/eco-indicators/blob/main")

# show table
read_csv(results_csv)
# A tibble: 46,080 × 6
   nms   dataset                           var   time                stat  value
   <chr> <chr>                             <chr> <dttm>              <chr> <dbl>
 1 CBNMS cmems_mod_glo_phy_my_0.083deg_P1… bott… 1993-01-01 00:00:00 mean   5.33
 2 CBNMS cmems_mod_glo_phy_my_0.083deg_P1… bott… 1993-01-01 00:00:00 sd     3.81
 3 CBNMS cmems_mod_glo_phy_my_0.083deg_P1… bott… 1993-02-01 00:00:00 mean   5.30
 4 CBNMS cmems_mod_glo_phy_my_0.083deg_P1… bott… 1993-02-01 00:00:00 sd     3.83
 5 CBNMS cmems_mod_glo_phy_my_0.083deg_P1… bott… 1993-03-01 00:00:00 mean   5.19
 6 CBNMS cmems_mod_glo_phy_my_0.083deg_P1… bott… 1993-03-01 00:00:00 sd     3.70
 7 CBNMS cmems_mod_glo_phy_my_0.083deg_P1… bott… 1993-04-01 00:00:00 mean   4.89
 8 CBNMS cmems_mod_glo_phy_my_0.083deg_P1… bott… 1993-04-01 00:00:00 sd     3.38
 9 CBNMS cmems_mod_glo_phy_my_0.083deg_P1… bott… 1993-05-01 00:00:00 mean   4.76
10 CBNMS cmems_mod_glo_phy_my_0.083deg_P1… bott… 1993-05-01 00:00:00 sd     3.15
# ℹ 46,070 more rows

table of results: copernicus.csv

Generate timeseries plot per sanctuary

Code
d <- read_csv(results_csv)
# table(d$var)
# bottomT  mlotst  so_depth=0.49402499  thetao_depth=0.49402499 
#   12288   12288                12288                    12288

plot_var_facets <- function(nms, data){
  png <- glue("figures/copernicus/{nms}.png")
  data |> 
    filter(
      nms == !!nms) |> 
    mutate(
      var = str_replace(var, "_depth=0.49402499", "_0.5m")) |> 
    pivot_wider(
      names_from = stat,
      values_from = value) |> 
    ggplot(aes(x = time, y = mean, color = var)) + 
    facet_wrap(~ var) +
    geom_line() + 
    geom_ribbon(aes(ymin = mean - sd, ymax = mean + sd, fill = var), color = NA, alpha = 0.2) + 
    theme(legend.position = "none") + 
    facet_wrap(~var, scales = "free_y") + 
    labs(
      title = nms,
      x = "Time", 
      y = "Value")
  ggsave(png, width = 8, height = 6)
}

sapply(sanctuaries$nms, plot_var_facets, data = d)

Show timeseries per sanctuary